%load_ext autoreload
%autoreload 2
from IPython.display import display
The autoreload extension is already loaded. To reload it, use: %reload_ext autoreload
import os
import pandas as pd
import numpy as np
import pymc3 as pm
import arviz as az
import statsmodels.api as sm
import statsmodels.formula.api as smf
from scipy.stats import pearsonr
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
from modules.data_handling import ExperimentParser, merge_sav_file
from modules.stats import build_logistic_model, build_pearson_model
# Aesthetics
def sns_styleset():
"""Configure parameters for plotting"""
sns.set_theme(context='paper',
style='whitegrid',
palette='deep',
font='Arial')
matplotlib.rcParams['figure.dpi'] = 150
matplotlib.rcParams['axes.linewidth'] = 1
matplotlib.rcParams['grid.color'] = '.8'
matplotlib.rcParams['axes.edgecolor'] = '.15'
matplotlib.rcParams['xtick.bottom'] = True
matplotlib.rcParams['ytick.left'] = True
matplotlib.rcParams['xtick.major.width'] = 1
matplotlib.rcParams['ytick.major.width'] = 1
matplotlib.rcParams['xtick.color'] = '.15'
matplotlib.rcParams['ytick.color'] = '.15'
matplotlib.rcParams['xtick.major.size'] = 3
matplotlib.rcParams['ytick.major.size'] = 3
matplotlib.rcParams['font.size'] = 10
matplotlib.rcParams['axes.titlesize'] = 10
matplotlib.rcParams['axes.labelsize'] = 10
matplotlib.rcParams['legend.fontsize'] = 10
matplotlib.rcParams['legend.frameon'] = False
matplotlib.rcParams['xtick.labelsize'] = 8
matplotlib.rcParams['ytick.labelsize'] = 8
sns_styleset()
MAPPING = {
'disease': 'Disease',
'weather': 'Weather',
1: 'FnoA First',
2: 'FnoA Second'
}
parser = ExperimentParser(
experiment_dir='data//learning_task_40_s//'
)
parser.parse_dat_files()
parser.get_data()
0%| | 0/40 [00:00<?, ?it/s]
0%| | 0/147 [00:00<?, ?it/s]
0%| | 0/147 [00:00<?, ?it/s]
0%| | 0/147 [00:00<?, ?it/s]
0%| | 0/147 [00:00<?, ?it/s]
0%| | 0/147 [00:00<?, ?it/s]
0%| | 0/147 [00:00<?, ?it/s]
0%| | 0/147 [00:00<?, ?it/s]
18%|██████████████▌ | 7/40 [00:00<00:00, 69.83it/s]
0%| | 0/147 [00:00<?, ?it/s]
0%| | 0/147 [00:00<?, ?it/s]
0%| | 0/147 [00:00<?, ?it/s]
0%| | 0/147 [00:00<?, ?it/s]
0%| | 0/147 [00:00<?, ?it/s]
0%| | 0/147 [00:00<?, ?it/s]
0%| | 0/147 [00:00<?, ?it/s]
35%|████████████████████████████▋ | 14/40 [00:00<00:00, 63.97it/s]
0%| | 0/147 [00:00<?, ?it/s]
0%| | 0/147 [00:00<?, ?it/s]
0%| | 0/147 [00:00<?, ?it/s]
0%| | 0/147 [00:00<?, ?it/s]
0%| | 0/147 [00:00<?, ?it/s]
0%| | 0/147 [00:00<?, ?it/s]
0%| | 0/147 [00:00<?, ?it/s]
52%|███████████████████████████████████████████ | 21/40 [00:00<00:00, 58.58it/s]
0%| | 0/147 [00:00<?, ?it/s]
0%| | 0/147 [00:00<?, ?it/s]
0%| | 0/147 [00:00<?, ?it/s]
0%| | 0/147 [00:00<?, ?it/s]
0%| | 0/147 [00:00<?, ?it/s]
0%| | 0/147 [00:00<?, ?it/s]
68%|███████████████████████████████████████████████████████▎ | 27/40 [00:00<00:00, 57.70it/s]
0%| | 0/147 [00:00<?, ?it/s]
0%| | 0/147 [00:00<?, ?it/s]
0%| | 0/147 [00:00<?, ?it/s]
0%| | 0/147 [00:00<?, ?it/s]
0%| | 0/147 [00:00<?, ?it/s]
0%| | 0/147 [00:00<?, ?it/s]
82%|███████████████████████████████████████████████████████████████████▋ | 33/40 [00:00<00:00, 55.55it/s]
0%| | 0/147 [00:00<?, ?it/s]
0%| | 0/147 [00:00<?, ?it/s]
0%| | 0/147 [00:00<?, ?it/s]
0%| | 0/147 [00:00<?, ?it/s]
0%| | 0/147 [00:00<?, ?it/s]
0%| | 0/147 [00:00<?, ?it/s]
98%|███████████████████████████████████████████████████████████████████████████████▉ | 39/40 [00:00<00:00, 51.87it/s]
0%| | 0/147 [00:00<?, ?it/s]
100%|██████████████████████████████████████████████████████████████████████████████████| 40/40 [00:00<00:00, 55.76it/s]
| subno | fnoa_order | format | counterbal | condition | trial_n | phase | include | cue_1 | cue_2 | cue_3 | cue_4 | prob_outcome_1 | outcome | resp_given | resp_norm_corr | resp_time | resp_rewar | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | s1 | 2 | weather | 1 | AnoF | 1.0 | Training | 1 | 1.0 | 0.0 | 0.0 | 0.0 | 0.75 | 2.0 | None | 1.0 | None | None |
| 1 | s1 | 2 | weather | 1 | AnoF | 2.0 | Training | 1 | 0.0 | 0.0 | 1.0 | 1.0 | 0.00 | 2.0 | None | 2.0 | None | None |
| 2 | s1 | 2 | weather | 1 | AnoF | 3.0 | Training | 1 | 1.0 | 0.0 | 0.0 | 0.0 | 0.75 | 1.0 | None | 1.0 | None | None |
| 3 | s1 | 2 | weather | 1 | AnoF | 4.0 | Training | 1 | 0.0 | 0.0 | 1.0 | 0.0 | 0.25 | 1.0 | None | 2.0 | None | None |
| 4 | s1 | 2 | weather | 1 | AnoF | 5.0 | Training | 1 | 0.0 | 0.0 | 1.0 | 0.0 | 0.25 | 2.0 | None | 2.0 | None | None |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 5425 | s9 | 2 | disease | 1 | FnoA | 66.0 | Test | 1 | 1.0 | 0.0 | 1.0 | 1.0 | 0.25 | NaN | 2.0 | 2.0 | 1.007 | None |
| 5426 | s9 | 2 | disease | 1 | FnoA | 67.0 | Test | 1 | 0.0 | 1.0 | 1.0 | 1.0 | 0.25 | NaN | 1.0 | 2.0 | 3.42 | None |
| 5427 | s9 | 2 | disease | 1 | FnoA | 68.0 | Test | 1 | 1.0 | 0.0 | 0.0 | 0.0 | 0.75 | NaN | 1.0 | 1.0 | 1.09 | None |
| 5428 | s9 | 2 | disease | 1 | FnoA | 69.0 | Test | 1 | 1.0 | 1.0 | 1.0 | 0.0 | 0.75 | NaN | 1.0 | 1.0 | 2.295 | None |
| 5429 | s9 | 2 | disease | 1 | FnoA | 70.0 | Test | 1 | 1.0 | 0.0 | 0.0 | 1.0 | 0.50 | NaN | 1.0 | NaN | 2.339 | None |
5430 rows × 18 columns
fnoa_test = parser.get_data(filters={'condition': 'FnoA', 'phase': 'Test'})
# We first remove (i.e. discount) data when responsese are too fast
# As indicated by point a in the further information
# section of the assignment document
lower_limit = np.percentile(fnoa_test['resp_time'], 0.01)
fnoa_test = fnoa_test[fnoa_test['resp_time'] > lower_limit]
fnoa_test
| subno | fnoa_order | format | counterbal | condition | trial_n | phase | include | cue_1 | cue_2 | cue_3 | cue_4 | prob_outcome_1 | outcome | resp_given | resp_norm_corr | resp_time | resp_rewar | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 122 | s1 | 2 | disease | 1 | FnoA | 57.0 | Test | 1 | 1.0 | 1.0 | 0.0 | 0.0 | 1.00 | NaN | 2.0 | 1.0 | 1.05 | None |
| 123 | s1 | 2 | disease | 1 | FnoA | 58.0 | Test | 1 | 0.0 | 1.0 | 0.0 | 0.0 | 0.75 | NaN | 2.0 | 1.0 | 1.035 | None |
| 124 | s1 | 2 | disease | 1 | FnoA | 59.0 | Test | 1 | 1.0 | 0.0 | 1.0 | 0.0 | 0.50 | NaN | 1.0 | NaN | 0.876 | None |
| 125 | s1 | 2 | disease | 1 | FnoA | 60.0 | Test | 1 | 0.0 | 0.0 | 1.0 | 0.0 | 0.25 | NaN | 2.0 | 2.0 | 1.383 | None |
| 126 | s1 | 2 | disease | 1 | FnoA | 61.0 | Test | 1 | 0.0 | 0.0 | 0.0 | 1.0 | 0.25 | NaN | 1.0 | 2.0 | 0.695 | None |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 5425 | s9 | 2 | disease | 1 | FnoA | 66.0 | Test | 1 | 1.0 | 0.0 | 1.0 | 1.0 | 0.25 | NaN | 2.0 | 2.0 | 1.007 | None |
| 5426 | s9 | 2 | disease | 1 | FnoA | 67.0 | Test | 1 | 0.0 | 1.0 | 1.0 | 1.0 | 0.25 | NaN | 1.0 | 2.0 | 3.42 | None |
| 5427 | s9 | 2 | disease | 1 | FnoA | 68.0 | Test | 1 | 1.0 | 0.0 | 0.0 | 0.0 | 0.75 | NaN | 1.0 | 1.0 | 1.09 | None |
| 5428 | s9 | 2 | disease | 1 | FnoA | 69.0 | Test | 1 | 1.0 | 1.0 | 1.0 | 0.0 | 0.75 | NaN | 1.0 | 1.0 | 2.295 | None |
| 5429 | s9 | 2 | disease | 1 | FnoA | 70.0 | Test | 1 | 1.0 | 0.0 | 0.0 | 1.0 | 0.50 | NaN | 1.0 | NaN | 2.339 | None |
559 rows × 18 columns
# we now derive the number of normatively correct responses
# the total number of normatively valid responses
# and the relative ratio
performance = fnoa_test[fnoa_test['resp_given'] == fnoa_test['resp_norm_corr']].groupby('subno').agg('size')
performance = pd.DataFrame(performance, columns=['n_norm_corr']).reset_index()
performance['total_norm_corr'] = fnoa_test[~fnoa_test['resp_norm_corr'].isnull()].groupby('subno').agg('size').values
performance['ratio_norm_corr'] = performance['n_norm_corr'] / performance['total_norm_corr']
# get info from the experimental design
performance['format'] = fnoa_test.groupby('subno')['format'].unique().values
performance['format'] = performance['format'].apply(lambda x: MAPPING[x[0]])
performance['fnoa_order'] = fnoa_test.groupby('subno')['fnoa_order'].unique().values
performance['fnoa_order'] = performance['fnoa_order'].apply(lambda x: MAPPING[int(x[0])])
# we now merge with the personality data
performance = merge_sav_file(
left_df=performance,
path='data//sav_files//learning_task_40_s.sav',
keys=['subno']
)
performance.head(10)
| subno | n_norm_corr | total_norm_corr | ratio_norm_corr | format | fnoa_order | age | ue | cd | ian | inc | epqe | epqp | bastot | nstot | harmtot | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | s1 | 4 | 9 | 0.444444 | Disease | FnoA Second | 25.0 | 6.0 | 16.0 | 13.0 | 14.0 | 6.0 | 7.0 | 35.0 | 26.0 | 27.0 |
| 1 | s10 | 8 | 10 | 0.800000 | Weather | FnoA First | 23.0 | 22.0 | 18.0 | 5.0 | 20.0 | 19.0 | 11.0 | 42.0 | 27.0 | 18.0 |
| 2 | s11 | 6 | 10 | 0.600000 | Weather | FnoA Second | 24.0 | 15.0 | 14.0 | 1.0 | 15.0 | 18.0 | 4.0 | 41.0 | 26.0 | 11.0 |
| 3 | s12 | 4 | 10 | 0.400000 | Disease | FnoA First | 24.0 | 8.0 | 14.0 | 16.0 | 9.0 | 8.0 | 1.0 | 32.0 | 11.0 | 15.0 |
| 4 | s13 | 6 | 10 | 0.600000 | Disease | FnoA Second | 21.0 | 12.0 | 8.0 | 6.0 | 14.0 | 14.0 | 8.0 | 49.0 | 20.0 | 5.0 |
| 5 | s14 | 7 | 10 | 0.700000 | Disease | FnoA Second | 22.0 | 7.0 | 6.0 | 1.0 | 7.0 | 18.0 | 2.0 | 37.0 | 20.0 | 13.0 |
| 6 | s15 | 4 | 10 | 0.400000 | Weather | FnoA First | 25.0 | 1.0 | 3.0 | 3.0 | 10.0 | 9.0 | 5.0 | 37.0 | 17.0 | 12.0 |
| 7 | s16 | 3 | 10 | 0.300000 | Weather | FnoA Second | 24.0 | 11.0 | 8.0 | 15.0 | 14.0 | 17.0 | 10.0 | 29.0 | 9.0 | 24.0 |
| 8 | s17 | 6 | 10 | 0.600000 | Disease | FnoA First | 25.0 | 1.0 | 6.0 | 5.0 | 8.0 | 11.0 | 4.0 | 37.0 | 16.0 | 11.0 |
| 9 | s18 | 6 | 10 | 0.600000 | Weather | FnoA First | 25.0 | 8.0 | 3.0 | 0.0 | 11.0 | 18.0 | 3.0 | 34.0 | 23.0 | 11.0 |
# we save all the data locally as requested in the assignement
# in a format suitable for being read by SPSS
parser.get_data().to_csv('results//datasets//full_dataset.csv', index=False)
fnoa_test.to_csv('results//datasets//fnoa_test_dataset.csv', index=False)
performance.to_csv('results//datasets//perfromance_personality.csv', index=False)
performance['n_norm_corr'].describe()
count 40.00000 mean 5.82500 std 1.72296 min 2.00000 25% 5.00000 50% 6.00000 75% 7.00000 max 10.00000 Name: n_norm_corr, dtype: float64
performance['ratio_norm_corr'].describe()
count 40.000000 mean 0.583611 std 0.171229 min 0.200000 25% 0.500000 50% 0.600000 75% 0.700000 max 1.000000 Name: ratio_norm_corr, dtype: float64
performance.groupby(['format', 'fnoa_order'])['ratio_norm_corr'].describe()
| count | mean | std | min | 25% | 50% | 75% | max | ||
|---|---|---|---|---|---|---|---|---|---|
| format | fnoa_order | ||||||||
| Disease | FnoA First | 10.0 | 0.600000 | 0.194365 | 0.300000 | 0.500 | 0.60 | 0.675 | 0.9 |
| FnoA Second | 10.0 | 0.614444 | 0.101166 | 0.444444 | 0.525 | 0.65 | 0.700 | 0.7 | |
| Weather | FnoA First | 10.0 | 0.520000 | 0.198886 | 0.200000 | 0.400 | 0.55 | 0.600 | 0.8 |
| FnoA Second | 10.0 | 0.600000 | 0.182574 | 0.300000 | 0.500 | 0.60 | 0.675 | 1.0 |
covariates = ['age', 'ue', 'cd', 'ian', 'inc', 'epqe', 'epqp', 'bastot', 'nstot', 'harmtot']
rhos = []
hdi_low = []
hdi_high = []
for covariate in covariates:
# standardize so to make default priors sensible
standardized = (
performance[['ratio_norm_corr', covariate]] - performance[['ratio_norm_corr', covariate]].mean()
) / performance[['ratio_norm_corr', covariate]].std()
# build model
pearson_model = build_pearson_model(
X='ratio_norm_corr',
y=covariate,
df=standardized
)
# sample from the posterior
with pearson_model:
trace = pm.sample(
cores=1,
return_inferencedata=False,
chains=4
)
# we check if the gelman-rubin statistic is sensible
# and if the traceplots look reasonable
display(pm.summary(trace))
pm.plot_trace(trace)
plt.show()
rhos.append(trace['Rho'].mean())
lower, upper = pm.hdi(trace['Rho']) # by default is 95% HDI
hdi_low.append(lower)
hdi_high.append(upper)
corr_results = pd.DataFrame()
corr_results['Covariate'] = covariates
corr_results['Mean Pearson R'] = rhos
corr_results['HDI 2.5%'] = hdi_low
corr_results['HDI 97.5%'] = hdi_high
Auto-assigning NUTS sampler... Initializing NUTS using jitter+adapt_diag... Sequential sampling (4 chains in 1 job) NUTS: [Rho, Sigmas, Mus]
Sampling 4 chains for 1_000 tune and 1_000 draw iterations (4_000 + 4_000 draws total) took 22 seconds.
| mean | sd | hdi_3% | hdi_97% | mcse_mean | mcse_sd | ess_bulk | ess_tail | r_hat | |
|---|---|---|---|---|---|---|---|---|---|
| Mus[0] | -0.001 | 0.162 | -0.295 | 0.319 | 0.002 | 0.003 | 5514.0 | 2953.0 | 1.0 |
| Mus[1] | -0.001 | 0.162 | -0.307 | 0.296 | 0.002 | 0.002 | 5218.0 | 2822.0 | 1.0 |
| Sigmas[0] | 1.029 | 0.122 | 0.813 | 1.264 | 0.002 | 0.001 | 6099.0 | 3014.0 | 1.0 |
| Sigmas[1] | 1.029 | 0.120 | 0.800 | 1.231 | 0.002 | 0.001 | 6495.0 | 3252.0 | 1.0 |
| Rho | -0.069 | 0.139 | -0.326 | 0.192 | 0.002 | 0.002 | 6142.0 | 3245.0 | 1.0 |
| Covariance[0,0] | 1.074 | 0.260 | 0.616 | 1.550 | 0.004 | 0.003 | 6099.0 | 3014.0 | 1.0 |
| Covariance[0,1] | -0.074 | 0.155 | -0.376 | 0.211 | 0.002 | 0.002 | 5661.0 | 3036.0 | 1.0 |
| Covariance[1,0] | -0.074 | 0.155 | -0.376 | 0.211 | 0.002 | 0.002 | 5661.0 | 3036.0 | 1.0 |
| Covariance[1,1] | 1.074 | 0.256 | 0.639 | 1.516 | 0.003 | 0.003 | 6495.0 | 3252.0 | 1.0 |
Auto-assigning NUTS sampler... Initializing NUTS using jitter+adapt_diag... Sequential sampling (4 chains in 1 job) NUTS: [Rho, Sigmas, Mus]
Sampling 4 chains for 1_000 tune and 1_000 draw iterations (4_000 + 4_000 draws total) took 21 seconds.
| mean | sd | hdi_3% | hdi_97% | mcse_mean | mcse_sd | ess_bulk | ess_tail | r_hat | |
|---|---|---|---|---|---|---|---|---|---|
| Mus[0] | 0.001 | 0.160 | -0.310 | 0.284 | 0.002 | 0.002 | 4578.0 | 3234.0 | 1.0 |
| Mus[1] | -0.001 | 0.160 | -0.297 | 0.296 | 0.002 | 0.002 | 4577.0 | 3292.0 | 1.0 |
| Sigmas[0] | 1.022 | 0.119 | 0.803 | 1.245 | 0.002 | 0.001 | 5575.0 | 3048.0 | 1.0 |
| Sigmas[1] | 1.027 | 0.125 | 0.815 | 1.270 | 0.002 | 0.001 | 4307.0 | 2515.0 | 1.0 |
| Rho | 0.123 | 0.133 | -0.130 | 0.365 | 0.002 | 0.002 | 5130.0 | 3076.0 | 1.0 |
| Covariance[0,0] | 1.058 | 0.254 | 0.635 | 1.538 | 0.004 | 0.003 | 5575.0 | 3048.0 | 1.0 |
| Covariance[0,1] | 0.130 | 0.149 | -0.156 | 0.414 | 0.002 | 0.002 | 4578.0 | 3215.0 | 1.0 |
| Covariance[1,0] | 0.130 | 0.149 | -0.156 | 0.414 | 0.002 | 0.002 | 4578.0 | 3215.0 | 1.0 |
| Covariance[1,1] | 1.070 | 0.267 | 0.617 | 1.559 | 0.004 | 0.003 | 4307.0 | 2515.0 | 1.0 |
Auto-assigning NUTS sampler... Initializing NUTS using jitter+adapt_diag... Sequential sampling (4 chains in 1 job) NUTS: [Rho, Sigmas, Mus]
Sampling 4 chains for 1_000 tune and 1_000 draw iterations (4_000 + 4_000 draws total) took 20 seconds.
| mean | sd | hdi_3% | hdi_97% | mcse_mean | mcse_sd | ess_bulk | ess_tail | r_hat | |
|---|---|---|---|---|---|---|---|---|---|
| Mus[0] | -0.005 | 0.158 | -0.311 | 0.294 | 0.002 | 0.002 | 4555.0 | 2967.0 | 1.0 |
| Mus[1] | 0.002 | 0.161 | -0.286 | 0.323 | 0.002 | 0.002 | 4207.0 | 3075.0 | 1.0 |
| Sigmas[0] | 1.022 | 0.116 | 0.807 | 1.228 | 0.002 | 0.001 | 4623.0 | 3030.0 | 1.0 |
| Sigmas[1] | 1.023 | 0.118 | 0.804 | 1.238 | 0.002 | 0.001 | 5123.0 | 2843.0 | 1.0 |
| Rho | 0.130 | 0.131 | -0.112 | 0.371 | 0.002 | 0.002 | 4995.0 | 3302.0 | 1.0 |
| Covariance[0,0] | 1.058 | 0.246 | 0.641 | 1.492 | 0.004 | 0.003 | 4623.0 | 3030.0 | 1.0 |
| Covariance[0,1] | 0.137 | 0.147 | -0.141 | 0.400 | 0.002 | 0.002 | 4551.0 | 3454.0 | 1.0 |
| Covariance[1,0] | 0.137 | 0.147 | -0.141 | 0.400 | 0.002 | 0.002 | 4551.0 | 3454.0 | 1.0 |
| Covariance[1,1] | 1.061 | 0.251 | 0.636 | 1.516 | 0.004 | 0.003 | 5123.0 | 2843.0 | 1.0 |
Auto-assigning NUTS sampler... Initializing NUTS using jitter+adapt_diag... Sequential sampling (4 chains in 1 job) NUTS: [Rho, Sigmas, Mus]
Sampling 4 chains for 1_000 tune and 1_000 draw iterations (4_000 + 4_000 draws total) took 20 seconds.
| mean | sd | hdi_3% | hdi_97% | mcse_mean | mcse_sd | ess_bulk | ess_tail | r_hat | |
|---|---|---|---|---|---|---|---|---|---|
| Mus[0] | -0.003 | 0.158 | -0.290 | 0.302 | 0.002 | 0.002 | 4744.0 | 3039.0 | 1.0 |
| Mus[1] | -0.003 | 0.159 | -0.292 | 0.297 | 0.002 | 0.002 | 4159.0 | 3253.0 | 1.0 |
| Sigmas[0] | 1.014 | 0.112 | 0.825 | 1.233 | 0.002 | 0.001 | 4841.0 | 2932.0 | 1.0 |
| Sigmas[1] | 1.015 | 0.118 | 0.816 | 1.243 | 0.002 | 0.001 | 4818.0 | 2649.0 | 1.0 |
| Rho | -0.235 | 0.128 | -0.464 | 0.008 | 0.002 | 0.001 | 4376.0 | 3203.0 | 1.0 |
| Covariance[0,0] | 1.040 | 0.234 | 0.664 | 1.498 | 0.003 | 0.003 | 4841.0 | 2932.0 | 1.0 |
| Covariance[0,1] | -0.245 | 0.148 | -0.530 | 0.020 | 0.002 | 0.002 | 4062.0 | 3064.0 | 1.0 |
| Covariance[1,0] | -0.245 | 0.148 | -0.530 | 0.020 | 0.002 | 0.002 | 4062.0 | 3064.0 | 1.0 |
| Covariance[1,1] | 1.044 | 0.248 | 0.643 | 1.517 | 0.004 | 0.003 | 4818.0 | 2649.0 | 1.0 |
Auto-assigning NUTS sampler... Initializing NUTS using jitter+adapt_diag... Sequential sampling (4 chains in 1 job) NUTS: [Rho, Sigmas, Mus]
Sampling 4 chains for 1_000 tune and 1_000 draw iterations (4_000 + 4_000 draws total) took 21 seconds.
| mean | sd | hdi_3% | hdi_97% | mcse_mean | mcse_sd | ess_bulk | ess_tail | r_hat | |
|---|---|---|---|---|---|---|---|---|---|
| Mus[0] | 0.001 | 0.159 | -0.294 | 0.293 | 0.002 | 0.002 | 4710.0 | 3350.0 | 1.0 |
| Mus[1] | -0.002 | 0.164 | -0.302 | 0.314 | 0.002 | 0.003 | 4518.0 | 3199.0 | 1.0 |
| Sigmas[0] | 1.025 | 0.119 | 0.810 | 1.244 | 0.002 | 0.001 | 3799.0 | 2561.0 | 1.0 |
| Sigmas[1] | 1.030 | 0.127 | 0.812 | 1.276 | 0.002 | 0.001 | 5567.0 | 3064.0 | 1.0 |
| Rho | 0.100 | 0.135 | -0.146 | 0.358 | 0.002 | 0.002 | 5607.0 | 2970.0 | 1.0 |
| Covariance[0,0] | 1.065 | 0.254 | 0.646 | 1.539 | 0.004 | 0.003 | 3799.0 | 2561.0 | 1.0 |
| Covariance[0,1] | 0.107 | 0.151 | -0.169 | 0.402 | 0.002 | 0.002 | 5251.0 | 2995.0 | 1.0 |
| Covariance[1,0] | 0.107 | 0.151 | -0.169 | 0.402 | 0.002 | 0.002 | 5251.0 | 2995.0 | 1.0 |
| Covariance[1,1] | 1.077 | 0.273 | 0.621 | 1.582 | 0.004 | 0.003 | 5567.0 | 3064.0 | 1.0 |
Auto-assigning NUTS sampler... Initializing NUTS using jitter+adapt_diag... Sequential sampling (4 chains in 1 job) NUTS: [Rho, Sigmas, Mus]
Sampling 4 chains for 1_000 tune and 1_000 draw iterations (4_000 + 4_000 draws total) took 21 seconds.
| mean | sd | hdi_3% | hdi_97% | mcse_mean | mcse_sd | ess_bulk | ess_tail | r_hat | |
|---|---|---|---|---|---|---|---|---|---|
| Mus[0] | 0.000 | 0.163 | -0.285 | 0.318 | 0.002 | 0.002 | 4512.0 | 3404.0 | 1.0 |
| Mus[1] | 0.002 | 0.160 | -0.309 | 0.293 | 0.002 | 0.002 | 4301.0 | 3084.0 | 1.0 |
| Sigmas[0] | 1.022 | 0.120 | 0.818 | 1.249 | 0.002 | 0.001 | 4614.0 | 3272.0 | 1.0 |
| Sigmas[1] | 1.018 | 0.116 | 0.816 | 1.235 | 0.002 | 0.001 | 4756.0 | 2944.0 | 1.0 |
| Rho | 0.188 | 0.131 | -0.066 | 0.421 | 0.002 | 0.001 | 5257.0 | 3052.0 | 1.0 |
| Covariance[0,0] | 1.058 | 0.253 | 0.637 | 1.528 | 0.004 | 0.003 | 4614.0 | 3272.0 | 1.0 |
| Covariance[0,1] | 0.198 | 0.149 | -0.080 | 0.469 | 0.002 | 0.002 | 4766.0 | 3066.0 | 1.0 |
| Covariance[1,0] | 0.198 | 0.149 | -0.080 | 0.469 | 0.002 | 0.002 | 4766.0 | 3066.0 | 1.0 |
| Covariance[1,1] | 1.050 | 0.245 | 0.665 | 1.525 | 0.004 | 0.003 | 4756.0 | 2944.0 | 1.0 |
Auto-assigning NUTS sampler... Initializing NUTS using jitter+adapt_diag... Sequential sampling (4 chains in 1 job) NUTS: [Rho, Sigmas, Mus]
Sampling 4 chains for 1_000 tune and 1_000 draw iterations (4_000 + 4_000 draws total) took 21 seconds.
| mean | sd | hdi_3% | hdi_97% | mcse_mean | mcse_sd | ess_bulk | ess_tail | r_hat | |
|---|---|---|---|---|---|---|---|---|---|
| Mus[0] | -0.000 | 0.162 | -0.320 | 0.299 | 0.002 | 0.003 | 5394.0 | 3145.0 | 1.0 |
| Mus[1] | -0.000 | 0.162 | -0.292 | 0.315 | 0.002 | 0.002 | 5159.0 | 3099.0 | 1.0 |
| Sigmas[0] | 1.027 | 0.123 | 0.826 | 1.265 | 0.002 | 0.001 | 5253.0 | 3198.0 | 1.0 |
| Sigmas[1] | 1.029 | 0.127 | 0.804 | 1.262 | 0.002 | 0.001 | 4912.0 | 3062.0 | 1.0 |
| Rho | -0.003 | 0.133 | -0.257 | 0.239 | 0.002 | 0.002 | 5148.0 | 3196.0 | 1.0 |
| Covariance[0,0] | 1.070 | 0.263 | 0.667 | 1.580 | 0.004 | 0.003 | 5253.0 | 3198.0 | 1.0 |
| Covariance[0,1] | -0.004 | 0.148 | -0.265 | 0.293 | 0.002 | 0.002 | 4841.0 | 3068.0 | 1.0 |
| Covariance[1,0] | -0.004 | 0.148 | -0.265 | 0.293 | 0.002 | 0.002 | 4841.0 | 3068.0 | 1.0 |
| Covariance[1,1] | 1.075 | 0.276 | 0.641 | 1.585 | 0.005 | 0.004 | 4912.0 | 3062.0 | 1.0 |
Auto-assigning NUTS sampler... Initializing NUTS using jitter+adapt_diag... Sequential sampling (4 chains in 1 job) NUTS: [Rho, Sigmas, Mus]
Sampling 4 chains for 1_000 tune and 1_000 draw iterations (4_000 + 4_000 draws total) took 21 seconds.
| mean | sd | hdi_3% | hdi_97% | mcse_mean | mcse_sd | ess_bulk | ess_tail | r_hat | |
|---|---|---|---|---|---|---|---|---|---|
| Mus[0] | -0.002 | 0.162 | -0.314 | 0.285 | 0.002 | 0.002 | 4787.0 | 3129.0 | 1.0 |
| Mus[1] | -0.000 | 0.157 | -0.281 | 0.308 | 0.002 | 0.003 | 4327.0 | 3028.0 | 1.0 |
| Sigmas[0] | 1.027 | 0.120 | 0.814 | 1.254 | 0.002 | 0.001 | 4687.0 | 2654.0 | 1.0 |
| Sigmas[1] | 1.022 | 0.116 | 0.833 | 1.265 | 0.002 | 0.001 | 5261.0 | 3026.0 | 1.0 |
| Rho | 0.120 | 0.134 | -0.146 | 0.360 | 0.002 | 0.002 | 4145.0 | 2985.0 | 1.0 |
| Covariance[0,0] | 1.069 | 0.255 | 0.622 | 1.519 | 0.004 | 0.003 | 4687.0 | 2654.0 | 1.0 |
| Covariance[0,1] | 0.127 | 0.149 | -0.147 | 0.423 | 0.002 | 0.002 | 3804.0 | 2968.0 | 1.0 |
| Covariance[1,0] | 0.127 | 0.149 | -0.147 | 0.423 | 0.002 | 0.002 | 3804.0 | 2968.0 | 1.0 |
| Covariance[1,1] | 1.057 | 0.246 | 0.622 | 1.512 | 0.004 | 0.003 | 5261.0 | 3026.0 | 1.0 |
Auto-assigning NUTS sampler... Initializing NUTS using jitter+adapt_diag... Sequential sampling (4 chains in 1 job) NUTS: [Rho, Sigmas, Mus]
Sampling 4 chains for 1_000 tune and 1_000 draw iterations (4_000 + 4_000 draws total) took 21 seconds.
| mean | sd | hdi_3% | hdi_97% | mcse_mean | mcse_sd | ess_bulk | ess_tail | r_hat | |
|---|---|---|---|---|---|---|---|---|---|
| Mus[0] | 0.004 | 0.162 | -0.283 | 0.326 | 0.002 | 0.003 | 4306.0 | 2729.0 | 1.0 |
| Mus[1] | 0.006 | 0.164 | -0.300 | 0.304 | 0.002 | 0.002 | 5302.0 | 3368.0 | 1.0 |
| Sigmas[0] | 1.025 | 0.120 | 0.817 | 1.244 | 0.002 | 0.001 | 4973.0 | 2751.0 | 1.0 |
| Sigmas[1] | 1.027 | 0.123 | 0.817 | 1.263 | 0.002 | 0.001 | 4909.0 | 3200.0 | 1.0 |
| Rho | 0.115 | 0.132 | -0.136 | 0.354 | 0.002 | 0.002 | 5228.0 | 3314.0 | 1.0 |
| Covariance[0,0] | 1.065 | 0.257 | 0.629 | 1.503 | 0.004 | 0.003 | 4973.0 | 2751.0 | 1.0 |
| Covariance[0,1] | 0.122 | 0.149 | -0.160 | 0.405 | 0.002 | 0.002 | 4818.0 | 3422.0 | 1.0 |
| Covariance[1,0] | 0.122 | 0.149 | -0.160 | 0.405 | 0.002 | 0.002 | 4818.0 | 3422.0 | 1.0 |
| Covariance[1,1] | 1.069 | 0.263 | 0.630 | 1.553 | 0.004 | 0.003 | 4909.0 | 3200.0 | 1.0 |
Auto-assigning NUTS sampler... Initializing NUTS using jitter+adapt_diag... Sequential sampling (4 chains in 1 job) NUTS: [Rho, Sigmas, Mus]
Sampling 4 chains for 1_000 tune and 1_000 draw iterations (4_000 + 4_000 draws total) took 21 seconds.
| mean | sd | hdi_3% | hdi_97% | mcse_mean | mcse_sd | ess_bulk | ess_tail | r_hat | |
|---|---|---|---|---|---|---|---|---|---|
| Mus[0] | 0.001 | 0.162 | -0.293 | 0.318 | 0.002 | 0.002 | 4886.0 | 2929.0 | 1.0 |
| Mus[1] | 0.001 | 0.162 | -0.318 | 0.295 | 0.002 | 0.003 | 5762.0 | 3234.0 | 1.0 |
| Sigmas[0] | 1.027 | 0.120 | 0.812 | 1.247 | 0.002 | 0.001 | 4611.0 | 3131.0 | 1.0 |
| Sigmas[1] | 1.027 | 0.119 | 0.824 | 1.260 | 0.002 | 0.001 | 4773.0 | 2454.0 | 1.0 |
| Rho | 0.091 | 0.135 | -0.175 | 0.332 | 0.002 | 0.002 | 4514.0 | 2671.0 | 1.0 |
| Covariance[0,0] | 1.069 | 0.255 | 0.649 | 1.543 | 0.004 | 0.003 | 4611.0 | 3131.0 | 1.0 |
| Covariance[0,1] | 0.098 | 0.153 | -0.200 | 0.381 | 0.002 | 0.002 | 4170.0 | 2693.0 | 1.0 |
| Covariance[1,0] | 0.098 | 0.153 | -0.200 | 0.381 | 0.002 | 0.002 | 4170.0 | 2693.0 | 1.0 |
| Covariance[1,1] | 1.069 | 0.253 | 0.641 | 1.543 | 0.004 | 0.003 | 4773.0 | 2454.0 | 1.0 |
corr_results
| Covariate | Mean Pearson R | HDI 2.5% | HDI 97.5% | |
|---|---|---|---|---|
| 0 | age | -0.068663 | -0.325862 | 0.192112 |
| 1 | ue | 0.123430 | -0.130318 | 0.364564 |
| 2 | cd | 0.129612 | -0.112102 | 0.371485 |
| 3 | ian | -0.235338 | -0.463639 | 0.007773 |
| 4 | inc | 0.100247 | -0.145514 | 0.358393 |
| 5 | epqe | 0.187801 | -0.066380 | 0.421486 |
| 6 | epqp | -0.003490 | -0.257402 | 0.238553 |
| 7 | bastot | 0.120192 | -0.145908 | 0.359700 |
| 8 | nstot | 0.115017 | -0.135995 | 0.354311 |
| 9 | harmtot | 0.091003 | -0.175435 | 0.332204 |
rhos = []
ps = []
for covariate in covariates:
rho, p = pearsonr(performance['ratio_norm_corr'], performance[covariate])
rhos.append(round(rho, 3))
ps.append(round(p, 3))
corr_results = pd.DataFrame(np.array([covariates, rhos, ps]).T, columns=['Covariate', 'Pearson R', 'p-value'])
corr_results
| Covariate | Pearson R | p-value | |
|---|---|---|---|
| 0 | age | -0.103 | 0.529 |
| 1 | ue | 0.178 | 0.271 |
| 2 | cd | 0.191 | 0.238 |
| 3 | ian | -0.335 | 0.035 |
| 4 | inc | 0.146 | 0.367 |
| 5 | epqe | 0.269 | 0.093 |
| 6 | epqp | -0.011 | 0.948 |
| 7 | bastot | 0.177 | 0.274 |
| 8 | nstot | 0.174 | 0.283 |
| 9 | harmtot | 0.134 | 0.41 |
plt.figure(figsize=(5, 5))
figure = sns.catplot(
x='format',
y='ratio_norm_corr',
hue='fnoa_order',
kind='point',
data=performance,
legend_out=True,
linewidth=1,
errwidth=1.5,
palette='tab20'
)
figure._legend.set_title('FnoA Order')
plt.xlabel('Format')
plt.ylabel('Ratio \n Normative Correct Responses')
plt.savefig('results//figures//exper_design.png', dpi=500, bbox_inches="tight")
<Figure size 750x750 with 0 Axes>
null_model = build_logistic_model(df=performance, null_model=True)
with null_model:
null_trace = pm.sample(
cores=1,
return_inferencedata=False,
chains=4
)
# we check if the gelman-rubin statistic is sensible
# and if the traceplots look reasonable
display(pm.summary(null_trace))
pm.plot_trace(null_trace)
plt.show()
Auto-assigning NUTS sampler... Initializing NUTS using jitter+adapt_diag... Sequential sampling (4 chains in 1 job) NUTS: [Intercept]
Sampling 4 chains for 1_000 tune and 1_000 draw iterations (4_000 + 4_000 draws total) took 5 seconds.
| mean | sd | hdi_3% | hdi_97% | mcse_mean | mcse_sd | ess_bulk | ess_tail | r_hat | |
|---|---|---|---|---|---|---|---|---|---|
| Intercept | 0.337 | 0.102 | 0.148 | 0.525 | 0.002 | 0.002 | 2028.0 | 2870.0 | 1.0 |
logistic_model = build_logistic_model(df=performance, null_model=False)
with logistic_model:
logistic_trace = pm.sample(
cores=1,
return_inferencedata=False,
chains=4
)
display(pm.summary(logistic_trace))
pm.plot_trace(logistic_trace)
plt.show()
Auto-assigning NUTS sampler... Initializing NUTS using jitter+adapt_diag... Sequential sampling (4 chains in 1 job) NUTS: [Interaction Slope, Order Slope, Format Slope, Intercept]
Sampling 4 chains for 1_000 tune and 1_000 draw iterations (4_000 + 4_000 draws total) took 13 seconds.
| mean | sd | hdi_3% | hdi_97% | mcse_mean | mcse_sd | ess_bulk | ess_tail | r_hat | |
|---|---|---|---|---|---|---|---|---|---|
| Intercept | 0.080 | 0.202 | -0.280 | 0.478 | 0.006 | 0.004 | 1192.0 | 1807.0 | 1.00 |
| Format Slope | 0.331 | 0.292 | -0.185 | 0.904 | 0.009 | 0.006 | 1070.0 | 1577.0 | 1.00 |
| Order Slope | 0.331 | 0.289 | -0.212 | 0.863 | 0.008 | 0.006 | 1250.0 | 1471.0 | 1.01 |
| Interaction Slope | -0.265 | 0.408 | -1.022 | 0.481 | 0.012 | 0.008 | 1240.0 | 1497.0 | 1.00 |
model_comp = az.compare(
{
"Null Model": null_trace,
"Binomial Model": logistic_trace
},
scale='deviance'
)
ax = az.plot_compare(model_comp, insample_dev=False)
plt.savefig('results//figures//model_comp_deviance.png', dpi=500)
C:\Users\penthotal\miniconda3\envs\bayes_env\lib\site-packages\arviz\stats\stats.py:145: UserWarning: The default method used to estimate the weights for each model,has changed from BB-pseudo-BMA to stacking warnings.warn( C:\Users\penthotal\miniconda3\envs\bayes_env\lib\site-packages\arviz\data\io_pymc3.py:96: FutureWarning: Using `from_pymc3` without the model will be deprecated in a future release. Not using the model will return less accurate and less useful results. Make sure you use the model argument or call from_pymc3 within a model context. warnings.warn( C:\Users\penthotal\miniconda3\envs\bayes_env\lib\site-packages\arviz\data\io_pymc3.py:96: FutureWarning: Using `from_pymc3` without the model will be deprecated in a future release. Not using the model will return less accurate and less useful results. Make sure you use the model argument or call from_pymc3 within a model context. warnings.warn(
mod = smf.glm(
"ratio_norm_corr ~ C(format, Treatment(reference='Weather')) * C(fnoa_order, Treatment(reference='FnoA First'))",
family=sm.families.Binomial(),
data=performance,
var_weights=performance['total_norm_corr'].values
).fit()
mod.summary()
| Dep. Variable: | ratio_norm_corr | No. Observations: | 40 |
|---|---|---|---|
| Model: | GLM | Df Residuals: | 36 |
| Model Family: | Binomial | Df Model: | 3 |
| Link Function: | logit | Scale: | 1.0000 |
| Method: | IRLS | Log-Likelihood: | -186.87 |
| Date: | Mon, 19 Apr 2021 | Deviance: | 50.395 |
| Time: | 15:12:09 | Pearson chi2: | 44.7 |
| No. Iterations: | 3 | ||
| Covariance Type: | nonrobust |
| coef | std err | z | P>|z| | [0.025 | 0.975] | |
|---|---|---|---|---|---|---|
| Intercept | 0.0800 | 0.200 | 0.400 | 0.689 | -0.312 | 0.472 |
| C(format, Treatment(reference='Weather'))[T.Disease] | 0.3254 | 0.286 | 1.138 | 0.255 | -0.235 | 0.886 |
| C(fnoa_order, Treatment(reference='FnoA First'))[T.FnoA Second] | 0.3254 | 0.286 | 1.138 | 0.255 | -0.235 | 0.886 |
| C(format, Treatment(reference='Weather'))[T.Disease]:C(fnoa_order, Treatment(reference='FnoA First'))[T.FnoA Second] | -0.2576 | 0.408 | -0.632 | 0.527 | -1.056 | 0.541 |